/*
 * Copyright (c) 2016-2019 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <vlibmemory/api.h>
#include <vlib/vlib.h>

#include <vppinfra/hash.h>
#include <vppinfra/error.h>
#include <vppinfra/elog.h>

#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
#include <vnet/udp/udp.h>
#include <vnet/udp/udp_packet.h>
#include <vnet/session/session.h>

static vlib_error_desc_t udp_error_counters[] = {
#define udp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
#include "udp_error.def"
#undef udp_error
};

typedef struct
{
  u32 connection;
  u32 disposition;
  clib_thread_index_t thread_index;
} udp_input_trace_t;

/* packet trace format function */
static u8 *
format_udp_input_trace (u8 * s, va_list * args)
{
  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
  udp_input_trace_t *t = va_arg (*args, udp_input_trace_t *);

  s = format (s, "UDP_INPUT: connection %d, disposition %d, thread %d",
	      t->connection, t->disposition, t->thread_index);
  return s;
}

#define foreach_udp_input_next			\
  _ (DROP, "error-drop")

typedef enum
{
#define _(s, n) UDP_INPUT_NEXT_##s,
  foreach_udp_input_next
#undef _
    UDP_INPUT_N_NEXT,
} udp_input_next_t;

always_inline void
udp_input_inc_counter (vlib_main_t * vm, u8 is_ip4, u8 evt, u8 val)
{
  if (is_ip4)
    vlib_node_increment_counter (vm, udp4_input_node.index, evt, val);
  else
    vlib_node_increment_counter (vm, udp6_input_node.index, evt, val);
}

#define udp_store_err_counters(vm, is_ip4, cnts)			\
{									\
  int i;								\
  for (i = 0; i < UDP_N_ERROR; i++)					\
    if (cnts[i])							\
      udp_input_inc_counter(vm, is_ip4, i, cnts[i]);			\
}

#define udp_inc_err_counter(cnts, err, val)				\
{									\
  cnts[err] += val;							\
}

static void
udp_trace_buffer (vlib_main_t * vm, vlib_node_runtime_t * node,
		  vlib_buffer_t * b, session_t * s, u16 error0)
{
  udp_input_trace_t *t;

  if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_IS_TRACED)))
    return;

  t = vlib_add_trace (vm, node, b, sizeof (*t));
  t->connection = s ? s->connection_index : ~0;
  t->disposition = error0;
  t->thread_index = s ? s->thread_index : vm->thread_index;
}

static udp_connection_t *
udp_connection_accept (udp_connection_t *listener, session_dgram_hdr_t *hdr,
		       clib_thread_index_t thread_index)
{
  udp_connection_t *uc;

  uc = udp_connection_alloc (thread_index);
  ip_copy (&uc->c_lcl_ip, &hdr->lcl_ip, hdr->is_ip4);
  ip_copy (&uc->c_rmt_ip, &hdr->rmt_ip, hdr->is_ip4);
  uc->c_lcl_port = hdr->lcl_port;
  uc->c_rmt_port = hdr->rmt_port;
  uc->c_is_ip4 = hdr->is_ip4;
  uc->c_fib_index = listener->c_fib_index;
  uc->mss = listener->mss;
  uc->flags |= UDP_CONN_F_CONNECTED;
  uc->cfg_flags = listener->cfg_flags;

  if (session_dgram_accept (&uc->connection, listener->c_s_index,
			    listener->c_thread_index))
    {
      udp_connection_free (uc);
      return 0;
    }

  udp_connection_share_port (uc->c_lcl_port, uc->c_is_ip4);
  return uc;
}

static void
udp_connection_enqueue (udp_connection_t *uc0, session_t *s0,
			session_dgram_hdr_t *hdr0,
			clib_thread_index_t thread_index, vlib_buffer_t *b,
			u8 queue_event, u32 *error0)
{
  int wrote0;

  if (!(uc0->flags & UDP_CONN_F_CONNECTED))
    {
      clib_spinlock_lock (&uc0->rx_lock);

      wrote0 = session_enqueue_dgram_connection_cl (
	s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event);

      clib_spinlock_unlock (&uc0->rx_lock);

      /* Expect cl udp enqueue to fail because fifo enqueue */
      if (PREDICT_FALSE (wrote0 == 0))
	{
	  *error0 = UDP_ERROR_FIFO_FULL;
	  uc0->errors_in += 1;
	}
      else
	{
	  uc0->bytes_in += wrote0 - sizeof (session_dgram_hdr_t);
	  uc0->dgrams_in += 1;
	}

      return;
    }

  if (svm_fifo_max_enqueue_prod (s0->rx_fifo)
      < hdr0->data_length + sizeof (session_dgram_hdr_t))
    {
      *error0 = UDP_ERROR_FIFO_FULL;
      uc0->errors_in += 1;
      return;
    }

  /* If session is owned by another thread and rx event needed,
   * enqueue event now while we still have the peeker lock */
  if (s0->thread_index != thread_index)
    {
      wrote0 = session_enqueue_dgram_connection2 (
	s0, hdr0, b, TRANSPORT_PROTO_UDP,
	queue_event && !svm_fifo_has_event (s0->rx_fifo));
    }
  else
    {
      wrote0 = session_enqueue_dgram_connection (
	s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event);
    }

  /* In some rare cases, session_enqueue_dgram_connection can fail because a
   * chunk cannot be allocated in the RX FIFO */
  if (PREDICT_FALSE (wrote0 == 0))
    {
      *error0 = UDP_ERROR_FIFO_NOMEM;
      uc0->errors_in += 1;
    }
  else
    {
      uc0->bytes_in += wrote0 - sizeof (session_dgram_hdr_t);
      uc0->dgrams_in += 1;
    }
}

always_inline session_t *
udp_parse_and_lookup_buffer (vlib_buffer_t *b, session_dgram_hdr_t *hdr,
			     u8 is_ip4, u8 is_nolookup)
{
  udp_header_t *udp;
  u32 fib_index;
  session_t *s;

  /* udp_local hands us a pointer to the udp data */
  udp = (udp_header_t *) (vlib_buffer_get_current (b) - sizeof (*udp));
  fib_index = vnet_buffer (b)->ip.fib_index;

  hdr->data_offset = 0;
  hdr->lcl_port = udp->dst_port;
  hdr->rmt_port = udp->src_port;
  hdr->is_ip4 = is_ip4;
  hdr->gso_size = 0;

  if (is_ip4)
    {
      ip4_header_t *ip4;

      /* TODO: must fix once udp_local does ip options correctly */
      ip4 = (ip4_header_t *) (((u8 *) udp) - sizeof (*ip4));
      ip_set (&hdr->lcl_ip, &ip4->dst_address, 1);
      ip_set (&hdr->rmt_ip, &ip4->src_address, 1);
      hdr->data_length = clib_net_to_host_u16 (ip4->length);
      hdr->data_length -= sizeof (ip4_header_t) + sizeof (udp_header_t);
      if (!is_nolookup)
	s = session_lookup_safe4 (fib_index, &ip4->dst_address,
				  &ip4->src_address, udp->dst_port,
				  udp->src_port, TRANSPORT_PROTO_UDP);
    }
  else
    {
      ip6_header_t *ip60;

      ip60 = (ip6_header_t *) (((u8 *) udp) - sizeof (*ip60));
      ip_set (&hdr->lcl_ip, &ip60->dst_address, 0);
      ip_set (&hdr->rmt_ip, &ip60->src_address, 0);
      hdr->data_length = clib_net_to_host_u16 (ip60->payload_length);
      hdr->data_length -= sizeof (udp_header_t);
      if (!is_nolookup)
	s = session_lookup_safe6 (fib_index, &ip60->dst_address,
				  &ip60->src_address, udp->dst_port,
				  udp->src_port, TRANSPORT_PROTO_UDP);
    }

  if (is_nolookup)
    s = session_get_from_handle (vnet_buffer (b)->udp.session_handle);

  /* Set the sw_if_index[VLIB_RX] to the interface we received
   * the connection on (the local interface) */
  vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->ip.rx_sw_if_index;

  if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
    b->current_length = hdr->data_length;
  else
    b->total_length_not_including_first_buffer = hdr->data_length
      - b->current_length;

  return s;
}

static u8
udp_connection_is_accepted (u32 fib_index, session_dgram_hdr_t *hdr, u8 is_ip4)
{
  session_t *s;

  if (is_ip4)
    s =
      session_lookup_safe4 (fib_index, &hdr->lcl_ip.ip4, &hdr->rmt_ip.ip4,
			    hdr->lcl_port, hdr->rmt_port, TRANSPORT_PROTO_UDP);
  else
    s =
      session_lookup_safe6 (fib_index, &hdr->lcl_ip.ip6, &hdr->rmt_ip.ip6,
			    hdr->lcl_port, hdr->rmt_port, TRANSPORT_PROTO_UDP);

  return (s && s->session_state != SESSION_STATE_LISTENING);
}

always_inline uword
udp46_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
		    vlib_frame_t *frame, u8 is_ip4, u8 is_nolookup)
{
  clib_thread_index_t thread_index = vm->thread_index;
  u32 n_left_from, *from, *first_buffer;
  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
  u16 err_counters[UDP_N_ERROR] = { 0 };

  from = first_buffer = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;
  vlib_get_buffers (vm, from, bufs, n_left_from);

  b = bufs;

  while (n_left_from > 0)
    {
      u32 error0 = UDP_ERROR_ENQUEUED;
      session_dgram_hdr_t hdr0;
      udp_connection_t *uc0;
      session_t *s0;

      s0 = udp_parse_and_lookup_buffer (b[0], &hdr0, is_ip4, is_nolookup);
      if (PREDICT_FALSE (!s0))
	{
	  error0 = UDP_ERROR_NO_LISTENER;
	  goto done;
	}

      if (s0->session_state == SESSION_STATE_OPENED)
	{
	  u8 queue_event = 1;
	  uc0 = udp_connection_from_transport (session_get_transport (s0));
	  uc0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
	  if (uc0->flags & UDP_CONN_F_CONNECTED)
	    {
	      error0 = UDP_ERROR_CONNECTED;
	      if (s0->thread_index != thread_index)
		{
		  /* uc0 clone may allow owner of s0 to grow its pool */
		  session_handle_t osh = session_handle (s0);
		  /*
		   * Clone the transport. It will be cleaned up with the
		   * session once we notify the session layer.
		   */
		  uc0 = udp_connection_clone_safe (s0->connection_index,
						   s0->thread_index);
		  ASSERT (session_index_from_handle (osh) == uc0->c_s_index);
		  /*
		   * Ask session layer for a new session.
		   */
		  session_dgram_connect_notify (&uc0->connection, osh, &s0);
		  queue_event = 0;
		}
	      else
		s0->session_state = SESSION_STATE_READY;
	    }
	  udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0],
				  queue_event, &error0);
	}
      else if (s0->session_state == SESSION_STATE_READY ||
	       s0->session_state == SESSION_STATE_ACCEPTING)
	{
	  uc0 = udp_connection_from_transport (session_get_transport (s0));
	  udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
				  &error0);
	}
      else if (s0->session_state == SESSION_STATE_LISTENING)
	{
	  uc0 = udp_connection_from_transport (session_get_transport (s0));
	  if (uc0->flags & UDP_CONN_F_CONNECTED)
	    {
	      if (is_nolookup)
		{
		  /* Make sure connection was not already accepted */
		  if (udp_connection_is_accepted (
			vnet_buffer (b[0])->ip.fib_index, &hdr0, is_ip4))
		    {
		      error0 = UDP_ERROR_CREATE_EXISTS;
		      goto done;
		    }
		}
	      uc0 = udp_connection_accept (uc0, &hdr0, thread_index);
	      if (!uc0)
		{
		  error0 = UDP_ERROR_CREATE_SESSION;
		  goto done;
		}
	      s0 = session_get (uc0->c_s_index, uc0->c_thread_index);
	      uc0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
	      error0 = UDP_ERROR_ACCEPT;
	    }
	  udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
				  &error0);
	}
      else
	{
	  error0 = UDP_ERROR_NOT_READY;
	}

    done:
      if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
	udp_trace_buffer (vm, node, b[0], s0, error0);

      b += 1;
      n_left_from -= 1;

      udp_inc_err_counter (err_counters, error0, 1);
    }

  vlib_buffer_free (vm, first_buffer, frame->n_vectors);
  session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP, thread_index);
  udp_store_err_counters (vm, is_ip4, err_counters);
  return frame->n_vectors;
}

VLIB_NODE_FN (udp4_input_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
  return udp46_input_inline (vm, node, frame, 1 /* is_ip4 */,
			     0 /* is_nolookup */);
}

VLIB_REGISTER_NODE (udp4_input_node) =
{
  .name = "udp4-input",
  .vector_size = sizeof (u32),
  .format_trace = format_udp_input_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,
  .n_errors = UDP_N_ERROR,
  .error_counters = udp_error_counters,
  .n_next_nodes = UDP_INPUT_N_NEXT,
  .next_nodes = {
#define _(s, n) [UDP_INPUT_NEXT_##s] = n,
      foreach_udp_input_next
#undef _
  },
};

VLIB_NODE_FN (udp4_input_nolookup_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
  return udp46_input_inline (vm, node, frame, 1 /* is_ip4 */,
			     1 /* is_nolookup */);
}

VLIB_REGISTER_NODE (udp4_input_nolookup_node) =
{
  .name = "udp4-input-nolookup",
  .vector_size = sizeof (u32),
  .format_trace = format_udp_input_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,
  .n_errors = UDP_N_ERROR,
  .error_counters = udp_error_counters,
  .n_next_nodes = UDP_INPUT_N_NEXT,
  .next_nodes = {
#define _(s, n) [UDP_INPUT_NEXT_##s] = n,
      foreach_udp_input_next
#undef _
  },
};

VLIB_NODE_FN (udp6_input_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
  return udp46_input_inline (vm, node, frame, 0 /* is_ip4 */,
			     0 /* is_nolookup */);
}

VLIB_REGISTER_NODE (udp6_input_node) =
{
  .name = "udp6-input",
  .vector_size = sizeof (u32),
  .format_trace = format_udp_input_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,
  .n_errors = UDP_N_ERROR,
  .error_counters = udp_error_counters,
  .n_next_nodes = UDP_INPUT_N_NEXT,
  .next_nodes = {
#define _(s, n) [UDP_INPUT_NEXT_##s] = n,
      foreach_udp_input_next
#undef _
  },
};

VLIB_NODE_FN (udp6_input_nolookup_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
  return udp46_input_inline (vm, node, frame, 0 /* is_ip4 */,
			     1 /* is_nolookup */);
}

VLIB_REGISTER_NODE (udp6_input_nolookup_node) =
{
  .name = "udp6-input-nolookup",
  .vector_size = sizeof (u32),
  .format_trace = format_udp_input_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,
  .n_errors = UDP_N_ERROR,
  .error_counters = udp_error_counters,
  .n_next_nodes = UDP_INPUT_N_NEXT,
  .next_nodes = {
#define _(s, n) [UDP_INPUT_NEXT_##s] = n,
      foreach_udp_input_next
#undef _
  },
};

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */
